package it.mengoni.persistence.db;

import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.SortedMap;

public class StringCharsetUtils {

	public static final String UTF_8 = "UTF-8";
	public static final String ISO_8859_1 = "ISO-8859-1";
	public static final String WINDOWS_1250 = "windows-1250";

	private StringCharsetUtils(){

	}

	private static final StringCharsetUtils instance = new StringCharsetUtils();

	public String convertWin1250ToUtf8(String value){
		return convert(value, WINDOWS_1250, UTF_8);
	}

	public String convertToUtf8(String strIso88591){
		return convert(strIso88591, ISO_8859_1, UTF_8);
	}

	public String convertToIso88591(String strUtf8){
		return convert(strUtf8, UTF_8, ISO_8859_1);
	}

	public String convertToUtf8(String value, String fromCS){
		return convert(value, UTF_8, fromCS);
	}

	public String convert(String value, String fromCS, String toCS){
		try {
			byte[] converted = new String(value.getBytes(), fromCS).getBytes(toCS);
			return new String(converted, toCS);
		} catch (UnsupportedEncodingException e) {
			return value;
		}
	}

	public static StringCharsetUtils getInstance() {
		return instance;
	}

	public static void main(String[] args) throws UnsupportedEncodingException {
		SortedMap<String, Charset> acs = Charset.availableCharsets();

		for (Charset cs : acs.values()) {
			System.out.println(cs.displayName());
		}

		Charset utf8charset = Charset.forName(UTF_8);
		Charset iso88591charset = Charset.forName(ISO_8859_1);

		ByteBuffer inputBuffer = ByteBuffer.wrap(new byte[]{(byte)0xC3, (byte)0xA2});

		// decode UTF-8
		CharBuffer data = utf8charset.decode(inputBuffer);
		System.out.println(new String(data.array()));

		// encode ISO-8559-1
		ByteBuffer outputBuffer = iso88591charset.encode(data);
		byte[] outputData = outputBuffer.array();
		System.out.println(new String(outputData, ISO_8859_1));


	}
/*
 * 00 = U+0000 : NULL
01 = U+0001 : START OF HEADING
02 = U+0002 : START OF TEXT
03 = U+0003 : END OF TEXT
04 = U+0004 : END OF TRANSMISSION
05 = U+0005 : ENQUIRY
06 = U+0006 : ACKNOWLEDGE
07 = U+0007 : BELL
08 = U+0008 : BACKSPACE
09 = U+0009 : HORIZONTAL TABULATION
0A = U+000A : LINE FEED
0B = U+000B : VERTICAL TABULATION
0C = U+000C : FORM FEED
0D = U+000D : CARRIAGE RETURN
0E = U+000E : SHIFT OUT
0F = U+000F : SHIFT IN
10 = U+0010 : DATA LINK ESCAPE
11 = U+0011 : DEVICE CONTROL ONE
12 = U+0012 : DEVICE CONTROL TWO
13 = U+0013 : DEVICE CONTROL THREE
14 = U+0014 : DEVICE CONTROL FOUR
15 = U+0015 : NEGATIVE ACKNOWLEDGE
16 = U+0016 : SYNCHRONOUS IDLE
17 = U+0017 : END OF TRANSMISSION BLOCK
18 = U+0018 : CANCEL
19 = U+0019 : END OF MEDIUM
1A = U+001A : SUBSTITUTE
1B = U+001B : ESCAPE
1C = U+001C : FILE SEPARATOR
1D = U+001D : GROUP SEPARATOR
1E = U+001E : RECORD SEPARATOR
1F = U+001F : UNIT SEPARATOR
20 = U+0020 : SPACE
21 = U+0021 : EXCLAMATION MARK
22 = U+0022 : QUOTATION MARK
23 = U+0023 : NUMBER SIGN
24 = U+0024 : DOLLAR SIGN
25 = U+0025 : PERCENT SIGN
26 = U+0026 : AMPERSAND
27 = U+0027 : APOSTROPHE
28 = U+0028 : LEFT PARENTHESIS
29 = U+0029 : RIGHT PARENTHESIS
2A = U+002A : ASTERISK
2B = U+002B : PLUS SIGN
2C = U+002C : COMMA
2D = U+002D : HYPHEN-MINUS
2E = U+002E : FULL STOP
2F = U+002F : SOLIDUS
30 = U+0030 : DIGIT ZERO
31 = U+0031 : DIGIT ONE
32 = U+0032 : DIGIT TWO
33 = U+0033 : DIGIT THREE
34 = U+0034 : DIGIT FOUR
35 = U+0035 : DIGIT FIVE
36 = U+0036 : DIGIT SIX
37 = U+0037 : DIGIT SEVEN
38 = U+0038 : DIGIT EIGHT
39 = U+0039 : DIGIT NINE
3A = U+003A : COLON
3B = U+003B : SEMICOLON
3C = U+003C : LESS-THAN SIGN
3D = U+003D : EQUALS SIGN
3E = U+003E : GREATER-THAN SIGN
3F = U+003F : QUESTION MARK
40 = U+0040 : COMMERCIAL AT
41 = U+0041 : LATIN CAPITAL LETTER A
42 = U+0042 : LATIN CAPITAL LETTER B
43 = U+0043 : LATIN CAPITAL LETTER C
44 = U+0044 : LATIN CAPITAL LETTER D
45 = U+0045 : LATIN CAPITAL LETTER E
46 = U+0046 : LATIN CAPITAL LETTER F
47 = U+0047 : LATIN CAPITAL LETTER G
48 = U+0048 : LATIN CAPITAL LETTER H
49 = U+0049 : LATIN CAPITAL LETTER I
4A = U+004A : LATIN CAPITAL LETTER J
4B = U+004B : LATIN CAPITAL LETTER K
4C = U+004C : LATIN CAPITAL LETTER L
4D = U+004D : LATIN CAPITAL LETTER M
4E = U+004E : LATIN CAPITAL LETTER N
4F = U+004F : LATIN CAPITAL LETTER O
50 = U+0050 : LATIN CAPITAL LETTER P
51 = U+0051 : LATIN CAPITAL LETTER Q
52 = U+0052 : LATIN CAPITAL LETTER R
53 = U+0053 : LATIN CAPITAL LETTER S
54 = U+0054 : LATIN CAPITAL LETTER T
55 = U+0055 : LATIN CAPITAL LETTER U
56 = U+0056 : LATIN CAPITAL LETTER V
57 = U+0057 : LATIN CAPITAL LETTER W
58 = U+0058 : LATIN CAPITAL LETTER X
59 = U+0059 : LATIN CAPITAL LETTER Y
5A = U+005A : LATIN CAPITAL LETTER Z
5B = U+005B : LEFT SQUARE BRACKET
5C = U+005C : REVERSE SOLIDUS
5D = U+005D : RIGHT SQUARE BRACKET
5E = U+005E : CIRCUMFLEX ACCENT
5F = U+005F : LOW LINE
60 = U+0060 : GRAVE ACCENT
61 = U+0061 : LATIN SMALL LETTER A
62 = U+0062 : LATIN SMALL LETTER B
63 = U+0063 : LATIN SMALL LETTER C
64 = U+0064 : LATIN SMALL LETTER D
65 = U+0065 : LATIN SMALL LETTER E
66 = U+0066 : LATIN SMALL LETTER F
67 = U+0067 : LATIN SMALL LETTER G
68 = U+0068 : LATIN SMALL LETTER H
69 = U+0069 : LATIN SMALL LETTER I
6A = U+006A : LATIN SMALL LETTER J
6B = U+006B : LATIN SMALL LETTER K
6C = U+006C : LATIN SMALL LETTER L
6D = U+006D : LATIN SMALL LETTER M
6E = U+006E : LATIN SMALL LETTER N
6F = U+006F : LATIN SMALL LETTER O
70 = U+0070 : LATIN SMALL LETTER P
71 = U+0071 : LATIN SMALL LETTER Q
72 = U+0072 : LATIN SMALL LETTER R
73 = U+0073 : LATIN SMALL LETTER S
74 = U+0074 : LATIN SMALL LETTER T
75 = U+0075 : LATIN SMALL LETTER U
76 = U+0076 : LATIN SMALL LETTER V
77 = U+0077 : LATIN SMALL LETTER W
78 = U+0078 : LATIN SMALL LETTER X
79 = U+0079 : LATIN SMALL LETTER Y
7A = U+007A : LATIN SMALL LETTER Z
7B = U+007B : LEFT CURLY BRACKET
7C = U+007C : VERTICAL LINE
7D = U+007D : RIGHT CURLY BRACKET
7E = U+007E : TILDE
7F = U+007F : DELETE
80 = U+20AC : EURO SIGN
82 = U+201A : SINGLE LOW-9 QUOTATION MARK
84 = U+201E : DOUBLE LOW-9 QUOTATION MARK
85 = U+2026 : HORIZONTAL ELLIPSIS
86 = U+2020 : DAGGER
87 = U+2021 : DOUBLE DAGGER
89 = U+2030 : PER MILLE SIGN
8A = U+0160 : LATIN CAPITAL LETTER S WITH CARON
8B = U+2039 : SINGLE LEFT-POINTING ANGLE QUOTATION MARK
8C = U+015A : LATIN CAPITAL LETTER S WITH ACUTE
8D = U+0164 : LATIN CAPITAL LETTER T WITH CARON
8E = U+017D : LATIN CAPITAL LETTER Z WITH CARON
8F = U+0179 : LATIN CAPITAL LETTER Z WITH ACUTE
91 = U+2018 : LEFT SINGLE QUOTATION MARK
92 = U+2019 : RIGHT SINGLE QUOTATION MARK
93 = U+201C : LEFT DOUBLE QUOTATION MARK
94 = U+201D : RIGHT DOUBLE QUOTATION MARK
95 = U+2022 : BULLET
96 = U+2013 : EN DASH
97 = U+2014 : EM DASH
99 = U+2122 : TRADE MARK SIGN
9A = U+0161 : LATIN SMALL LETTER S WITH CARON
9B = U+203A : SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
9C = U+015B : LATIN SMALL LETTER S WITH ACUTE
9D = U+0165 : LATIN SMALL LETTER T WITH CARON
9E = U+017E : LATIN SMALL LETTER Z WITH CARON
9F = U+017A : LATIN SMALL LETTER Z WITH ACUTE
A0 = U+00A0 : NO-BREAK SPACE
A1 = U+02C7 : CARON
A2 = U+02D8 : BREVE
A3 = U+0141 : LATIN CAPITAL LETTER L WITH STROKE
A4 = U+00A4 : CURRENCY SIGN
A5 = U+0104 : LATIN CAPITAL LETTER A WITH OGONEK
A6 = U+00A6 : BROKEN BAR
A7 = U+00A7 : SECTION SIGN
A8 = U+00A8 : DIAERESIS
A9 = U+00A9 : COPYRIGHT SIGN
AA = U+015E : LATIN CAPITAL LETTER S WITH CEDILLA
AB = U+00AB : LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
AC = U+00AC : NOT SIGN
AD = U+00AD : SOFT HYPHEN
AE = U+00AE : REGISTERED SIGN
AF = U+017B : LATIN CAPITAL LETTER Z WITH DOT ABOVE
B0 = U+00B0 : DEGREE SIGN
B1 = U+00B1 : PLUS-MINUS SIGN
B2 = U+02DB : OGONEK
B3 = U+0142 : LATIN SMALL LETTER L WITH STROKE
B4 = U+00B4 : ACUTE ACCENT
B5 = U+00B5 : MICRO SIGN
B6 = U+00B6 : PILCROW SIGN
B7 = U+00B7 : MIDDLE DOT
B8 = U+00B8 : CEDILLA
B9 = U+0105 : LATIN SMALL LETTER A WITH OGONEK
BA = U+015F : LATIN SMALL LETTER S WITH CEDILLA
BB = U+00BB : RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
BC = U+013D : LATIN CAPITAL LETTER L WITH CARON
BD = U+02DD : DOUBLE ACUTE ACCENT
BE = U+013E : LATIN SMALL LETTER L WITH CARON
BF = U+017C : LATIN SMALL LETTER Z WITH DOT ABOVE
C0 = U+0154 : LATIN CAPITAL LETTER R WITH ACUTE
C1 = U+00C1 : LATIN CAPITAL LETTER A WITH ACUTE
C2 = U+00C2 : LATIN CAPITAL LETTER A WITH CIRCUMFLEX
C3 = U+0102 : LATIN CAPITAL LETTER A WITH BREVE
C4 = U+00C4 : LATIN CAPITAL LETTER A WITH DIAERESIS
C5 = U+0139 : LATIN CAPITAL LETTER L WITH ACUTE
C6 = U+0106 : LATIN CAPITAL LETTER C WITH ACUTE
C7 = U+00C7 : LATIN CAPITAL LETTER C WITH CEDILLA
C8 = U+010C : LATIN CAPITAL LETTER C WITH CARON
C9 = U+00C9 : LATIN CAPITAL LETTER E WITH ACUTE
CA = U+0118 : LATIN CAPITAL LETTER E WITH OGONEK
CB = U+00CB : LATIN CAPITAL LETTER E WITH DIAERESIS
CC = U+011A : LATIN CAPITAL LETTER E WITH CARON
CD = U+00CD : LATIN CAPITAL LETTER I WITH ACUTE
CE = U+00CE : LATIN CAPITAL LETTER I WITH CIRCUMFLEX
CF = U+010E : LATIN CAPITAL LETTER D WITH CARON
D0 = U+0110 : LATIN CAPITAL LETTER D WITH STROKE
D1 = U+0143 : LATIN CAPITAL LETTER N WITH ACUTE
D2 = U+0147 : LATIN CAPITAL LETTER N WITH CARON
D3 = U+00D3 : LATIN CAPITAL LETTER O WITH ACUTE
D4 = U+00D4 : LATIN CAPITAL LETTER O WITH CIRCUMFLEX
D5 = U+0150 : LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
D6 = U+00D6 : LATIN CAPITAL LETTER O WITH DIAERESIS
D7 = U+00D7 : MULTIPLICATION SIGN
D8 = U+0158 : LATIN CAPITAL LETTER R WITH CARON
D9 = U+016E : LATIN CAPITAL LETTER U WITH RING ABOVE
DA = U+00DA : LATIN CAPITAL LETTER U WITH ACUTE
DB = U+0170 : LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
DC = U+00DC : LATIN CAPITAL LETTER U WITH DIAERESIS
DD = U+00DD : LATIN CAPITAL LETTER Y WITH ACUTE
DE = U+0162 : LATIN CAPITAL LETTER T WITH CEDILLA
DF = U+00DF : LATIN SMALL LETTER SHARP S
E0 = U+0155 : LATIN SMALL LETTER R WITH ACUTE
E1 = U+00E1 : LATIN SMALL LETTER A WITH ACUTE
E2 = U+00E2 : LATIN SMALL LETTER A WITH CIRCUMFLEX
E3 = U+0103 : LATIN SMALL LETTER A WITH BREVE
E4 = U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
E5 = U+013A : LATIN SMALL LETTER L WITH ACUTE
E6 = U+0107 : LATIN SMALL LETTER C WITH ACUTE
E7 = U+00E7 : LATIN SMALL LETTER C WITH CEDILLA
E8 = U+010D : LATIN SMALL LETTER C WITH CARON
E9 = U+00E9 : LATIN SMALL LETTER E WITH ACUTE
EA = U+0119 : LATIN SMALL LETTER E WITH OGONEK
EB = U+00EB : LATIN SMALL LETTER E WITH DIAERESIS
EC = U+011B : LATIN SMALL LETTER E WITH CARON
ED = U+00ED : LATIN SMALL LETTER I WITH ACUTE
EE = U+00EE : LATIN SMALL LETTER I WITH CIRCUMFLEX
EF = U+010F : LATIN SMALL LETTER D WITH CARON
F0 = U+0111 : LATIN SMALL LETTER D WITH STROKE
F1 = U+0144 : LATIN SMALL LETTER N WITH ACUTE
F2 = U+0148 : LATIN SMALL LETTER N WITH CARON
F3 = U+00F3 : LATIN SMALL LETTER O WITH ACUTE
F4 = U+00F4 : LATIN SMALL LETTER O WITH CIRCUMFLEX
F5 = U+0151 : LATIN SMALL LETTER O WITH DOUBLE ACUTE
F6 = U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
F7 = U+00F7 : DIVISION SIGN
F8 = U+0159 : LATIN SMALL LETTER R WITH CARON
F9 = U+016F : LATIN SMALL LETTER U WITH RING ABOVE
FA = U+00FA : LATIN SMALL LETTER U WITH ACUTE
FB = U+0171 : LATIN SMALL LETTER U WITH DOUBLE ACUTE
FC = U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
FD = U+00FD : LATIN SMALL LETTER Y WITH ACUTE
FE = U+0163 : LATIN SMALL LETTER T WITH CEDILLA
FF = U+02D9 : DOT ABOVE
*/

}
